###############################################################################
# AZ cleaning
#
# written may 8 2024
#   sbaltz at mit
###############################################################################

import pandas as pd

RAW_FILE_NAME = '../transcribed/2020_general_greenlee_hand_count.csv'
SAVE_FILE_NAME = '../ready/greenlee_cleaned.csv'
COUNTY_NAME = 'GREENLEE'

ct = pd.read_csv(RAW_FILE_NAME)

cleaned = pd.DataFrame()

for i in range(len(ct)):
    if 'Vote Center Counted: ' in str(ct['candidate'][i]):
        precinct = str(ct['candidate'][i].replace('Vote Center Counted: ',''))
        #In AZ, a Vote Center is a physical election-day voting location
        mode = 'ELECTION DAY'
        batch = ''
    if 'Batch Counted: ' in str(ct['candidate'][i]):
        batch = str(ct['candidate'][i].replace('Batch Counted: ',''))
        batch = batch.replace(r'#','')
        precinct = ''
        mode = 'EARLY'
    if ('Race: ' in str(ct['original'][i])) and \
       ('Category' not in str(ct['original'][i])):
        office = str(ct['original'][i].replace('Race: ',''))
    if str(ct["original"][i]) != "nan" and str(ct["audited"][i]) != "nan":
        try:
            state = "ARIZONA"
            county = COUNTY_NAME,
            candidate = str(ct['candidate'][i])
            original = int(ct['original'][i])
            audited = int(ct['audited'][i])
            difference = audited - original
            method = "MANUAL"
            if 'Totals' not in candidate:
                cleaned = pd.concat([cleaned,
                            pd.DataFrame([[state,county,precinct,batch,office,\
                            candidate,original,audited,difference,mode,method]])
                            ])
        except Exception as inst:
            print(f"ROW {i} NOT SAVED: ", inst)

cleaned.columns = ['state',
                   'county',
                   'precinct',
                   'batch',
                   'office',
                   'candidate',
                   'original',
                   'audited',
                   'difference',
                   'mode',
                   'type'
                  ]

cleaned.office = cleaned.office.str.strip()
cleaned.county = COUNTY_NAME

cleaned.to_csv(SAVE_FILE_NAME, index=False)
